{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/snap/google-cloud-sdk/126/lib/third_party/requests/__init__.py:83: RequestsDependencyWarning: Old version of cryptography ([1, 2, 3]) may cause slowdown.\n",
      "  warnings.warn(warning, RequestsDependencyWarning)\n",
      "Copying gs://mesolitica-general/albert-base-actual/model.ckpt-400000.data-00000-of-00001...\n",
      "| [1 files][138.2 MiB/138.2 MiB]                                                \n",
      "Operation completed over 1 objects/138.2 MiB.                                    \n",
      "/snap/google-cloud-sdk/126/lib/third_party/requests/__init__.py:83: RequestsDependencyWarning: Old version of cryptography ([1, 2, 3]) may cause slowdown.\n",
      "  warnings.warn(warning, RequestsDependencyWarning)\n",
      "Copying gs://mesolitica-general/albert-base-actual/model.ckpt-400000.index...\n",
      "/ [1 files][  2.0 KiB/  2.0 KiB]                                                \n",
      "Operation completed over 1 objects/2.0 KiB.                                      \n",
      "/snap/google-cloud-sdk/126/lib/third_party/requests/__init__.py:83: RequestsDependencyWarning: Old version of cryptography ([1, 2, 3]) may cause slowdown.\n",
      "  warnings.warn(warning, RequestsDependencyWarning)\n",
      "Copying gs://mesolitica-general/albert-base-actual/model.ckpt-400000.meta...\n",
      "/ [1 files][  2.1 MiB/  2.1 MiB]                                                \n",
      "Operation completed over 1 objects/2.1 MiB.                                      \n"
     ]
    }
   ],
   "source": [
    "# # !mkdir out\n",
    "!gsutil cp gs://mesolitica-general/albert-base-actual/model.ckpt-400000.data-00000-of-00001 out\n",
    "!gsutil cp gs://mesolitica-general/albert-base-actual/model.ckpt-400000.index out\n",
    "!gsutil cp gs://mesolitica-general/albert-base-actual/model.ckpt-400000.meta out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mkdir: cannot create directory ‘albert-base-2020-04-10’: File exists\n",
      "albert-base-2020-04-10/\n",
      "albert-base-2020-04-10/model.ckpt-400000.index\n",
      "albert-base-2020-04-10/config.json\n",
      "albert-base-2020-04-10/model.ckpt-400000.meta\n",
      "albert-base-2020-04-10/sp10m.cased.v10.model\n",
      "albert-base-2020-04-10/sp10m.cased.v10.vocab\n",
      "albert-base-2020-04-10/model.ckpt-400000.data-00000-of-00001\n"
     ]
    }
   ],
   "source": [
    "!mkdir albert-base-2020-04-10\n",
    "!cp sp10m.cased.v10.* albert-base-2020-04-10\n",
    "!cp BASE_config.json albert-base-2020-04-10/config.json\n",
    "!cp out/model.ckpt-400000* albert-base-2020-04-10\n",
    "!tar cvzf albert-base-2020-04-10.tar.gz albert-base-2020-04-10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import modeling\n",
    "import optimization\n",
    "import tokenization\n",
    "import tensorflow as tf\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip3 install sentencepiece"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:loading sentence piece model\n"
     ]
    }
   ],
   "source": [
    "tokenizer = tokenization.FullTokenizer(\n",
    "      vocab_file='sp10m.cased.v10.vocab', do_lower_case=False,\n",
    "      spm_model_file='sp10m.cased.v10.model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['▁Hu', 'se', 'in', '▁comel']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.tokenize('Husein comel')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<modeling.AlbertConfig at 0x7faaeb5c9da0>"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "albert_config = modeling.AlbertConfig.from_json_file('BASE_config.json')\n",
    "albert_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gather_indexes(sequence_tensor, positions):\n",
    "    \"\"\"Gathers the vectors at the specific positions over a minibatch.\"\"\"\n",
    "    sequence_shape = modeling.get_shape_list(sequence_tensor, expected_rank=3)\n",
    "    batch_size = sequence_shape[0]\n",
    "    seq_length = sequence_shape[1]\n",
    "    width = sequence_shape[2]\n",
    "\n",
    "    flat_offsets = tf.reshape(\n",
    "      tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1])\n",
    "    flat_positions = tf.reshape(positions + flat_offsets, [-1])\n",
    "    flat_sequence_tensor = tf.reshape(sequence_tensor,\n",
    "                                    [batch_size * seq_length, width])\n",
    "    output_tensor = tf.gather(flat_sequence_tensor, flat_positions)\n",
    "    return output_tensor\n",
    "\n",
    "class Model:\n",
    "    def __init__(\n",
    "        self,\n",
    "    ):\n",
    "        self.X = tf.placeholder(tf.int32, [None, None])\n",
    "        self.segment_ids = tf.placeholder(tf.int32, [None, None])\n",
    "        self.input_masks = tf.placeholder(tf.int32, [None, None])\n",
    "        \n",
    "        model = modeling.AlbertModel(\n",
    "            config=albert_config,\n",
    "            is_training=False,\n",
    "            input_ids=self.X,\n",
    "            input_mask=self.input_masks,\n",
    "            token_type_ids=self.segment_ids,\n",
    "            use_one_hot_embeddings=False)\n",
    "        \n",
    "        input_tensor = model.get_sequence_output()\n",
    "        output_weights = model.get_embedding_table()\n",
    "        \n",
    "        with tf.variable_scope(\"cls/predictions\"):\n",
    "            with tf.variable_scope(\"transform\"):\n",
    "                input_tensor = tf.layers.dense(\n",
    "                              input_tensor,\n",
    "                              units=albert_config.embedding_size,\n",
    "                              activation=modeling.get_activation(albert_config.hidden_act),\n",
    "                              kernel_initializer=modeling.create_initializer(\n",
    "                                  albert_config.initializer_range))\n",
    "                input_tensor = modeling.layer_norm(input_tensor)\n",
    "            \n",
    "            output_bias = tf.get_variable(\n",
    "                \"output_bias\",\n",
    "                shape=[albert_config.vocab_size],\n",
    "                initializer=tf.zeros_initializer())\n",
    "            logits = tf.matmul(input_tensor, output_weights, transpose_b=True)\n",
    "            logits = tf.nn.bias_add(logits, output_bias)\n",
    "            log_probs = tf.nn.log_softmax(logits, axis=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/ubuntu/notebook/albert/modeling.py:256: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use keras.layers.Dense instead.\n",
      "WARNING:tensorflow:From /home/ubuntu/.local/lib/python3.6/site-packages/tensorflow_core/python/layers/core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `layer.__call__` method instead.\n"
     ]
    }
   ],
   "source": [
    "tf.reset_default_graph()\n",
    "sess = tf.InteractiveSession()\n",
    "model = Model()\n",
    "\n",
    "sess.run(tf.global_variables_initializer())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Restoring parameters from out/model.ckpt-400000\n"
     ]
    }
   ],
   "source": [
    "var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'bert')\n",
    "cls = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'cls')\n",
    "saver = tf.train.Saver(var_list = var_lists + cls)\n",
    "saver.restore(sess, 'out/model.ckpt-400000')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'albert-base/model.ckpt'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "saver = tf.train.Saver(tf.trainable_variables())\n",
    "saver.save(sess, 'albert-base/model.ckpt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !cp sp10m.cased.v10.* albert-base\n",
    "# !cp BASE_config.json albert-base/config.json\n",
    "# !tar cvzf albert-base.tar.gz albert-base"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "out = 'albert-base-bahasa-cased'\n",
    "os.makedirs(out, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AlbertTokenizer, AlbertModel, AlbertConfig, AutoTokenizer, AutoModelWithLMHead, pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('albert-base-bahasa-cased/spiece.model',\n",
       " 'albert-base-bahasa-cased/special_tokens_map.json',\n",
       " 'albert-base-bahasa-cased/added_tokens.json')"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer = AlbertTokenizer('sp10m.cased.v10.model', do_lower_case = False)\n",
    "tokenizer.save_pretrained('albert-base-bahasa-cased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import logging\n",
    "from transformers import AlbertConfig, AlbertForMaskedLM, load_tf_weights_in_albert\n",
    "\n",
    "\n",
    "logging.basicConfig(level=logging.INFO)\n",
    "\n",
    "\n",
    "def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_file, pytorch_dump_path):\n",
    "    # Initialise PyTorch model\n",
    "    config = AlbertConfig.from_json_file(albert_config_file)\n",
    "    print(\"Building PyTorch model from configuration: {}\".format(str(config)))\n",
    "    model = AlbertForMaskedLM(config)\n",
    "\n",
    "    # Load weights from tf checkpoint\n",
    "    load_tf_weights_in_albert(model, config, tf_checkpoint_path)\n",
    "\n",
    "    # Save pytorch-model\n",
    "    print(\"Save PyTorch model to {}\".format(pytorch_dump_path))\n",
    "    torch.save(model.state_dict(), pytorch_dump_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Building PyTorch model from configuration: AlbertConfig {\n",
      "  \"_num_labels\": 2,\n",
      "  \"architectures\": null,\n",
      "  \"attention_probs_dropout_prob\": 0,\n",
      "  \"bos_token_id\": 2,\n",
      "  \"classifier_dropout_prob\": 0.1,\n",
      "  \"decoder_start_token_id\": null,\n",
      "  \"do_sample\": false,\n",
      "  \"down_scale_factor\": 1,\n",
      "  \"early_stopping\": false,\n",
      "  \"embedding_size\": 128,\n",
      "  \"eos_token_id\": 3,\n",
      "  \"finetuning_task\": null,\n",
      "  \"gap_size\": 0,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0,\n",
      "  \"hidden_size\": 768,\n",
      "  \"id2label\": {\n",
      "    \"0\": \"LABEL_0\",\n",
      "    \"1\": \"LABEL_1\"\n",
      "  },\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"inner_group_num\": 1,\n",
      "  \"intermediate_size\": 3072,\n",
      "  \"is_decoder\": false,\n",
      "  \"is_encoder_decoder\": false,\n",
      "  \"label2id\": {\n",
      "    \"LABEL_0\": 0,\n",
      "    \"LABEL_1\": 1\n",
      "  },\n",
      "  \"layer_norm_eps\": 1e-12,\n",
      "  \"length_penalty\": 1.0,\n",
      "  \"max_length\": 20,\n",
      "  \"max_position_embeddings\": 512,\n",
      "  \"min_length\": 0,\n",
      "  \"model_type\": \"albert\",\n",
      "  \"net_structure_type\": 0,\n",
      "  \"no_repeat_ngram_size\": 0,\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_beams\": 1,\n",
      "  \"num_hidden_groups\": 1,\n",
      "  \"num_hidden_layers\": 12,\n",
      "  \"num_memory_blocks\": 0,\n",
      "  \"num_return_sequences\": 1,\n",
      "  \"output_attentions\": false,\n",
      "  \"output_hidden_states\": false,\n",
      "  \"output_past\": true,\n",
      "  \"pad_token_id\": 0,\n",
      "  \"prefix\": null,\n",
      "  \"pruned_heads\": {},\n",
      "  \"repetition_penalty\": 1.0,\n",
      "  \"task_specific_params\": null,\n",
      "  \"temperature\": 1.0,\n",
      "  \"top_k\": 50,\n",
      "  \"top_p\": 1.0,\n",
      "  \"torchscript\": false,\n",
      "  \"type_vocab_size\": 2,\n",
      "  \"use_bfloat16\": false,\n",
      "  \"vocab_size\": 32000\n",
      "}\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:transformers.modeling_albert:Converting TensorFlow checkpoint from /home/ubuntu/notebook/albert/albert-base/model.ckpt\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/LayerNorm/beta with shape [128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/LayerNorm/gamma with shape [128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/position_embeddings with shape [512, 128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/token_type_embeddings with shape [2, 128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/word_embeddings with shape [32000, 128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/embedding_hidden_mapping_in/bias with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/embedding_hidden_mapping_in/kernel with shape [128, 768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel with shape [768, 768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel with shape [768, 768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel with shape [768, 768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel with shape [768, 768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias with shape [3072]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/pooler/dense/bias with shape [768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/pooler/dense/kernel with shape [768, 768]\n",
      "INFO:transformers.modeling_albert:Loading TF weight cls/predictions/output_bias with shape [32000]\n",
      "INFO:transformers.modeling_albert:Loading TF weight cls/predictions/transform/LayerNorm/beta with shape [128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight cls/predictions/transform/LayerNorm/gamma with shape [128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight cls/predictions/transform/dense/bias with shape [128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight cls/predictions/transform/dense/kernel with shape [768, 128]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "bert/embeddings/LayerNorm/beta\n",
      "bert/embeddings/LayerNorm/gamma\n",
      "bert/embeddings/position_embeddings\n",
      "bert/embeddings/token_type_embeddings\n",
      "bert/embeddings/word_embeddings\n",
      "bert/encoder/embedding_hidden_mapping_in/bias\n",
      "bert/encoder/embedding_hidden_mapping_in/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta\n",
      "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma\n",
      "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta\n",
      "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel\n",
      "bert/pooler/dense/bias\n",
      "bert/pooler/dense/kernel\n",
      "cls/predictions/output_bias\n",
      "cls/predictions/transform/LayerNorm/beta\n",
      "cls/predictions/transform/LayerNorm/gamma\n",
      "cls/predictions/transform/dense/bias\n",
      "cls/predictions/transform/dense/kernel\n",
      "Initialize PyTorch weight ['albert', 'embeddings', 'LayerNorm', 'beta'] from bert/embeddings/LayerNorm/beta\n",
      "Initialize PyTorch weight ['albert', 'embeddings', 'LayerNorm', 'gamma'] from bert/embeddings/LayerNorm/gamma\n",
      "Initialize PyTorch weight ['albert', 'embeddings', 'position_embeddings'] from bert/embeddings/position_embeddings\n",
      "Initialize PyTorch weight ['albert', 'embeddings', 'token_type_embeddings'] from bert/embeddings/token_type_embeddings\n",
      "Initialize PyTorch weight ['albert', 'embeddings', 'word_embeddings'] from bert/embeddings/word_embeddings\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'embedding_hidden_mapping_in', 'bias'] from bert/encoder/embedding_hidden_mapping_in/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'embedding_hidden_mapping_in', 'kernel'] from bert/encoder/embedding_hidden_mapping_in/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'LayerNorm', 'beta'] from bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'LayerNorm', 'gamma'] from bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'full_layer_layer_norm', 'beta'] from bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'full_layer_layer_norm', 'gamma'] from bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'dense', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'dense', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'key', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'key', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'query', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'query', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'value', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'value', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'ffn', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'ffn', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'ffn_output', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'ffn_output', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel\n",
      "Initialize PyTorch weight ['albert', 'pooler', 'bias'] from bert/pooler/dense/bias\n",
      "Initialize PyTorch weight ['albert', 'pooler', 'kernel'] from bert/pooler/dense/kernel\n",
      "Initialize PyTorch weight ['predictions', 'output_bias'] from cls/predictions/output_bias\n",
      "Initialize PyTorch weight ['predictions', 'LayerNorm', 'beta'] from cls/predictions/transform/LayerNorm/beta\n",
      "Initialize PyTorch weight ['predictions', 'LayerNorm', 'gamma'] from cls/predictions/transform/LayerNorm/gamma\n",
      "Initialize PyTorch weight ['predictions', 'dense', 'bias'] from cls/predictions/transform/dense/bias\n",
      "Initialize PyTorch weight ['predictions', 'dense', 'kernel'] from cls/predictions/transform/dense/kernel\n",
      "Save PyTorch model to albert-base-bahasa-cased/pytorch_model.bin\n"
     ]
    }
   ],
   "source": [
    "convert_tf_checkpoint_to_pytorch('albert-base/model.ckpt', \n",
    "                                 'BASE_config.json', \n",
    "                                 'albert-base-bahasa-cased/pytorch_model.bin')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:transformers.tokenization_utils:Model name './albert-base-bahasa-cased' not found in model shortcut name list (albert-base-v1, albert-large-v1, albert-xlarge-v1, albert-xxlarge-v1, albert-base-v2, albert-large-v2, albert-xlarge-v2, albert-xxlarge-v2). Assuming './albert-base-bahasa-cased' is a path, a model identifier, or url to a directory containing tokenizer files.\n",
      "INFO:transformers.tokenization_utils:Didn't find file ./albert-base-bahasa-cased/added_tokens.json. We won't load it.\n",
      "INFO:transformers.tokenization_utils:loading file ./albert-base-bahasa-cased/spiece.model\n",
      "INFO:transformers.tokenization_utils:loading file None\n",
      "INFO:transformers.tokenization_utils:loading file ./albert-base-bahasa-cased/special_tokens_map.json\n",
      "INFO:transformers.tokenization_utils:loading file ./albert-base-bahasa-cased/tokenizer_config.json\n"
     ]
    }
   ],
   "source": [
    "tokenizer = AlbertTokenizer.from_pretrained('./albert-base-bahasa-cased', do_lower_case = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "config = AlbertConfig('BASE_config.json')\n",
    "config.vocab_size = 32000\n",
    "config.intermediate_size = 3072\n",
    "config.hidden_size = 768\n",
    "config.num_attention_heads = 12\n",
    "config.num_hidden_groups = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:transformers.modeling_utils:loading weights file ./albert-base-bahasa-cased/pytorch_model.bin\n"
     ]
    }
   ],
   "source": [
    "model = AutoModelWithLMHead.from_pretrained('./albert-base-bahasa-cased/pytorch_model.bin', config = config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'sequence': '[CLS] tolonglah gov buat something, kami dah penat[SEP]',\n",
       "  'score': 0.11175338923931122,\n",
       "  'token': 1384},\n",
       " {'sequence': '[CLS] tolonglah gov buat something, kami dah berjaya[SEP]',\n",
       "  'score': 0.016671981662511826,\n",
       "  'token': 801},\n",
       " {'sequence': '[CLS] tolonglah gov buat something, kami dah malas[SEP]',\n",
       "  'score': 0.016193486750125885,\n",
       "  'token': 1311},\n",
       " {'sequence': '[CLS] tolonglah gov buat something, kami dah tahu[SEP]',\n",
       "  'score': 0.015381624922156334,\n",
       "  'token': 178},\n",
       " {'sequence': '[CLS] tolonglah gov buat something, kami dah takde[SEP]',\n",
       "  'score': 0.012386327609419823,\n",
       "  'token': 630}]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fill_mask('tolonglah gov buat something, kami dah [MASK]')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:transformers.configuration_utils:Configuration saved in albert-base-bahasa-cased/config.json\n",
      "INFO:transformers.modeling_utils:Model weights saved in albert-base-bahasa-cased/pytorch_model.bin\n"
     ]
    }
   ],
   "source": [
    "model.save_pretrained('albert-base-bahasa-cased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !transformers-cli upload ./albert-base-bahasa-cased"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:filelock:Lock 140089044183136 acquired on /home/ubuntu/.cache/torch/transformers/ea11a3ad24741e88ffe3afdba3b4e9f717f246fa1735f969817c4016c768ff34.c13d76cf88c72892944197024aec640d2cc3d97998a50fe9e30ee63acb76b15d.lock\n",
      "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/huseinzol05/albert-base-bahasa-cased/pytorch_model.bin not found in cache or force_download set to True, downloading to /home/ubuntu/.cache/torch/transformers/tmpevyatob7\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "13ac5a8660b8473f9ac48bcb1ff76b32",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=48287920.0, style=ProgressStyle(descrip…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/huseinzol05/albert-base-bahasa-cased/pytorch_model.bin in cache at /home/ubuntu/.cache/torch/transformers/ea11a3ad24741e88ffe3afdba3b4e9f717f246fa1735f969817c4016c768ff34.c13d76cf88c72892944197024aec640d2cc3d97998a50fe9e30ee63acb76b15d\n",
      "INFO:transformers.file_utils:creating metadata file for /home/ubuntu/.cache/torch/transformers/ea11a3ad24741e88ffe3afdba3b4e9f717f246fa1735f969817c4016c768ff34.c13d76cf88c72892944197024aec640d2cc3d97998a50fe9e30ee63acb76b15d\n",
      "INFO:filelock:Lock 140089044183136 released on /home/ubuntu/.cache/torch/transformers/ea11a3ad24741e88ffe3afdba3b4e9f717f246fa1735f969817c4016c768ff34.c13d76cf88c72892944197024aec640d2cc3d97998a50fe9e30ee63acb76b15d.lock\n",
      "INFO:transformers.modeling_utils:loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/huseinzol05/albert-base-bahasa-cased/pytorch_model.bin from cache at /home/ubuntu/.cache/torch/transformers/ea11a3ad24741e88ffe3afdba3b4e9f717f246fa1735f969817c4016c768ff34.c13d76cf88c72892944197024aec640d2cc3d97998a50fe9e30ee63acb76b15d\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "model = AutoModelWithLMHead.from_pretrained('huseinzol05/albert-base-bahasa-cased', config = config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:transformers.tokenization_utils:Model name 'huseinzol05/albert-base-bahasa-cased' not found in model shortcut name list (albert-base-v1, albert-large-v1, albert-xlarge-v1, albert-xxlarge-v1, albert-base-v2, albert-large-v2, albert-xlarge-v2, albert-xxlarge-v2). Assuming 'huseinzol05/albert-base-bahasa-cased' is a path, a model identifier, or url to a directory containing tokenizer files.\n",
      "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/huseinzol05/albert-base-bahasa-cased/spiece.model from cache at /home/ubuntu/.cache/torch/transformers/5e5d2b3ecd5e53c40b88133bc5ccf6c527407004bf26ac19df9764e2e196798c.62912bc1f6182c2bdac801dba22c51182bb7bdbc199b220c540bbb4dada8ed34\n",
      "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/huseinzol05/albert-base-bahasa-cased/added_tokens.json from cache at None\n",
      "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/huseinzol05/albert-base-bahasa-cased/special_tokens_map.json from cache at /home/ubuntu/.cache/torch/transformers/911d1525e9e81df5039bf87f977e65602993b0fe5946f8593a785493520a99ef.4f0d42b1849e2d6fd72c735fba48dff0d2f0a55f5d1961e79bcfce337d354167\n",
      "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/huseinzol05/albert-base-bahasa-cased/tokenizer_config.json from cache at /home/ubuntu/.cache/torch/transformers/6496b95ff9cea1ac5f39ea6702cc9ae604802faba1a6a0f1aca1b3c10bbe0a4c.3889713104075cfee9e96090bcdd0dc753733b3db9da20d1dd8b2cd1030536a2\n"
     ]
    }
   ],
   "source": [
    "tokenizer = AlbertTokenizer.from_pretrained('huseinzol05/albert-base-bahasa-cased', do_lower_case = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'sequence': '[CLS] makan ayam dengan rendang[SEP]',\n",
       "  'score': 0.09598397463560104,\n",
       "  'token': 2451},\n",
       " {'sequence': '[CLS] makan ayam dengan ayam[SEP]',\n",
       "  'score': 0.048915520310401917,\n",
       "  'token': 629},\n",
       " {'sequence': '[CLS] makan ayam dengan nasi[SEP]',\n",
       "  'score': 0.041438233107328415,\n",
       "  'token': 453},\n",
       " {'sequence': '[CLS] makan ayam dengan sayur[SEP]',\n",
       "  'score': 0.03901659697294235,\n",
       "  'token': 1639},\n",
       " {'sequence': '[CLS] makan ayam dengan ikan[SEP]',\n",
       "  'score': 0.03564858436584473,\n",
       "  'token': 758}]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)\n",
    "fill_mask('makan ayam dengan [MASK]')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
